cut <- seq(0, 10, by = 0.1)
n <- 500Simulating Interval-Censoring
Simulation Parameters
Data Generating Processes
The effect sizes of the risk factors are similar to those of the UMOD SNP.
Other Parameters
DGPs
# loghazard formula for sim_pexp and sim_weibull
## baseline hazard analysis
formula_pexp <- "~ -3.5 + dgamma(t, 8, 2) * 6"
formula_weibull <- "~ -3.5"
## fixed effects analysis
formula_pexp <- "~ -3.5 + dgamma(t, 8, 2) * 6 -1.3*x1"
formula_weibull <- "~ -3.5 -1.3*x1"
# implicit loghazard formula for sim_icenReg
formula_icenReg = "~log(1.5) - log(4) + (1.5 - 1) * (log(t) - log(4))"
# sim_pexp
visits_min = 1
visits_max = 10
ic_mechanism = c("beta", "uniform", "equidistant")
# sim_weibull
shape = 1.5
ic = TRUE
visits_min = 1
visits_max = 10
ic_mechanism = c("beta", "uniform", "equidistant")
# sim_icenReg
scale = 4
shape = 1
inspections = 2
inspectLength = 2.5Censoring Mechanisms
interval_censor_individual_beta <- function(event_time, event_status, visits_min, visits_max, max_time, round) {
v <- sample(visits_min:visits_max, 1)
params <- mvtnorm::rmvnorm(1, mean = c(0, 0), sigma = diag(2))
shape1 <- abs(params[1]) + 0.5
shape2 <- abs(params[2]) + 0.5
quantiles <- seq(0, 1, length.out = v + 2)[-c(1, v + 2)]
obs_times <- sort(qbeta(quantiles, shape1 = shape1, shape2 = shape2) * max_time)
if(!is.null(round)) {
obs_times <- round(obs_times, round)
}
obs_times_full <- c(0, obs_times)
interval_index <- findInterval(event_time, obs_times_full)
if (interval_index == length(obs_times_full)) {
# event time exceeds largest observed time: censored after last interval
time_ic_start <- obs_times_full[interval_index - 1]
time_ic_stop <- obs_times_full[interval_index]
status_ic <- 0
} else {
# event time falls between observed interval points
time_ic_start <- obs_times_full[interval_index]
time_ic_stop <- obs_times_full[interval_index + 1]
status_ic <- ifelse(event_status == 1, 1, 0)
}
return(c(time_ic_start = time_ic_start,
time_ic_stop = time_ic_stop,
status_ic = status_ic))
}
interval_censor_individual_uniform <- function(event_time, event_status, visits_min, visits_max, max_time, round_digits) {
v <- sample(visits_min:visits_max, 1)
param <- rnorm(1, mean = 0, sd = 1)
jitter_sd <- abs(param) + 0.1
even_times <- seq(0, max_time, length.out = v + 2)[-c(1, v + 2)]
jittered_times <- even_times + rnorm(v, mean = 0, sd = jitter_sd)
# Ensure jittered times are strictly within (0, max_time)
jittered_times <- sort(jittered_times[jittered_times > 0 & jittered_times < max_time])
# Explicit correction: if no valid jittered times, use max_time as the single interval time
if (length(jittered_times) == 0) {
jittered_times <- max_time
}
if (!is.null(round_digits)) {
jittered_times <- round(jittered_times, round_digits)
}
obs_times_full <- c(0, jittered_times)
interval_index <- findInterval(event_time, obs_times_full)
if (interval_index == length(obs_times_full)) {
time_ic_start <- obs_times_full[interval_index - 1]
time_ic_stop <- obs_times_full[interval_index]
status_ic <- 0
} else {
time_ic_start <- obs_times_full[interval_index]
time_ic_stop <- obs_times_full[interval_index + 1]
status_ic <- ifelse(event_status == 1, 1, 0)
}
return(c(time_ic_start = time_ic_start,
time_ic_stop = time_ic_stop,
status_ic = status_ic))
}
interval_censor_individual_equidistant <- function(event_time, event_status, visits_min, visits_max, max_time, round_digits) {
breaks <- seq(0, max_time, length.out = visits_max + 1)
if (!is.null(round_digits)) {
breaks <- round(breaks, round_digits)
breaks <- sort(unique(breaks))
}
interval_index <- findInterval(event_time, breaks, rightmost.closed = TRUE)
# Ensure valid interval
interval_index <- min(max(interval_index, 1), length(breaks) - 1)
time_ic_start <- breaks[interval_index]
time_ic_stop <- breaks[interval_index + 1]
status_ic <- event_status
return(c(time_ic_start = time_ic_start,
time_ic_stop = time_ic_stop,
status_ic = status_ic))
}Models
wrapper_pam <- function(
data,
job,
instance,
bs = "ps",
k = 10,
ic_point = c("mid", "end", "mid_end", "exact", "oracle")) {}
wrapper_cox <- function(
data,
job,
instance,
ic_point = c("mid", "end", "exact", "oracle")) {}
wrapper_weibull <- function(
data,
job,
instance,
ic_point = c("mid", "end", "exact", "oracle", "adjustment"),
fct = c("flexsurvreg", "survreg")
) {}Fixed Effects
Coefficients
Piecewise Exponential DGP
Beta IC Mechanism
Uniform IC Mechanism
Equidistant IC Mechanism
Weibull DGP
Beta IC Mechanism
Uniform IC Mechanism
Equidistant IC Mechanism
Coverages
Piecewise Exponential DGP
Beta IC Mechanism
Uniform IC Mechanism
Equidistant IC Mechanism
Weibull DGP
Beta IC Mechanism
Uniform IC Mechanism
Equidistant IC Mechanism
Conclusion
- Coverage of fixed effect hazards is as desired across all DGPs, IC mechanisms, algorithms, and IC points
Baseline Hazards
Illustration of individual baseline log-hazard estimations
Piecewise Exponential DGP
pexp beta pam mid
pexp uniform pam mid
pexp equidistant pam mid
Weibull DGP
weibull beta pam mid
weibull uniform pam mid
weibull equidistant pam mid
icenReg DGP
icenReg uniform pam mid
Illustration of individual baseline cumulative hazard estimations
Piecewise Exponential DGP
pexp beta pam mid
pexp uniform pam mid
pexp equidistant pam mid
Weibull DGP
weibull beta pam mid
weibull uniform pam mid
weibull equidistant pam mid
icenReg DGP
icenReg uniform pam mid
Coverage of Baseline Log-Hazards
Piecewise Exponential DGP
Beta IC Mechanism
Uniform IC Mechanism
Equidistant IC Mechanism
Weibull DGP
Beta IC Mechanism
Uniform IC Mechanism
Equidistant IC Mechanism
icenReg DGP
Uniform IC Mechanism
Coverage of Baseline Cumulative Hazards
Piecewise Exponential DGP
Beta IC Mechanism
Uniform IC Mechanism
Equidistant IC Mechanism
Weibull DGP
Beta IC Mechanism
Uniform IC Mechanism
Equidistant IC Mechanism
icenReg DGP
Uniform IC Mechanism
Coverage of Baseline Survival Functions
Piecewise Exponential DGP
Beta IC Mechanism
Uniform IC Mechanism
Equidistant IC Mechanism
Weibull DGP
Beta IC Mechanism
Uniform IC Mechanism
Equidistant IC Mechanism
icenReg DGP
Uniform IC Mechanism
Conclusion
- If adjustment for IC is not possible, the interval midpoint is always always the better choice than the interval endpoint
- The coverage of baseline loghazards by PAMs is between 50% and 75%, depending on specifications
- The coverage of baseline cumulative hazards by PAMs is much higher, between 85% and 97%, depending on specifications
- The coverage of baseline survival functions by PAMs is similar to that of the baseline cumulative hazards
- The stark difference between coverage of baseline loghazards versus baseline cumulative hazards is likely due to poor estimation / bias of loghazards for very small and very large t